── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): state, fips
dbl (2): cases, deaths
date (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 52 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): state, state_name, geo_id
dbl (2): population, pop_density
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
'data.frame': 58094 obs. of 9 variables:
$ state : chr "Alabama" "Alabama" "Alabama" "Alabama" ...
$ date : Date, format: "2023-01-04" "2020-04-25" ...
$ fips : chr "01" "01" "01" "01" ...
$ cases : num 1587224 6213 1638348 1549285 8691 ...
$ deaths : num 21263 213 21400 21129 343 ...
$ geo_id : chr "01" "01" "01" "01" ...
$ population : num 4887871 4887871 4887871 4887871 4887871 ...
$ pop_density: num 96.5 96.5 96.5 96.5 96.5 ...
$ abb : chr "AL" "AL" "AL" "AL" ...
3.
# format the datecv_states$date <-as.Date(cv_states$date, format="%Y-%m-%d")# format the state and state abbreviation (abb) variablesstate_list <-unique(cv_states$state)cv_states$state <-factor(cv_states$state, levels = state_list)abb_list <-unique(cv_states$abb)cv_states$abb <-factor(cv_states$abb, levels = abb_list)str(cv_states)
### FINISH THE CODE HERE # order the data first by state, second by datecv_states = cv_states[order(cv_states$state, cv_states$date),]# Confirm the variables are now correctly formattedstr(cv_states)
# Inspect the range values for each variable. What is the date range? The range of cases and deaths?head(cv_states)
state date fips cases deaths geo_id population pop_density abb
1029 Alabama 2020-03-13 01 6 0 01 4887871 96.50939 AL
597 Alabama 2020-03-14 01 12 0 01 4887871 96.50939 AL
282 Alabama 2020-03-15 01 23 0 01 4887871 96.50939 AL
12 Alabama 2020-03-16 01 29 0 01 4887871 96.50939 AL
266 Alabama 2020-03-17 01 39 0 01 4887871 96.50939 AL
78 Alabama 2020-03-18 01 51 0 01 4887871 96.50939 AL
summary(cv_states)
state date fips
Washington : 1158 Min. :2020-01-21 Length:58094
Illinois : 1155 1st Qu.:2020-12-06 Class :character
California : 1154 Median :2021-09-11 Mode :character
Arizona : 1153 Mean :2021-09-10
Massachusetts: 1147 3rd Qu.:2022-06-17
Wisconsin : 1143 Max. :2023-03-23
(Other) :51184
cases deaths geo_id population
Min. : 1 Min. : 0 Length:58094 Min. : 577737
1st Qu.: 112125 1st Qu.: 1598 Class :character 1st Qu.: 1805832
Median : 418120 Median : 5901 Mode :character Median : 4468402
Mean : 947941 Mean : 12553 Mean : 6397965
3rd Qu.: 1134318 3rd Qu.: 15952 3rd Qu.: 7535591
Max. :12169158 Max. :104277 Max. :39557045
pop_density abb
Min. : 1.292 WA : 1158
1st Qu.: 43.659 IL : 1155
Median : 107.860 CA : 1154
Mean : 423.031 AZ : 1153
3rd Qu.: 229.511 MA : 1147
Max. :11490.120 WI : 1143
NA's :1106 (Other):51184
min(cv_states$date)
[1] "2020-01-21"
max(cv_states$date)
[1] "2023-03-23"
4.
# Add variables for new_cases and new_deaths:for (i in1:length(state_list)) { cv_subset <-subset(cv_states, state == state_list[i]) cv_subset <- cv_subset[order(cv_subset$date),]# add starting level for new cases and deaths cv_subset$new_cases <- cv_subset$cases[1] cv_subset$new_deaths <- cv_subset$deaths[1]### FINISH THE CODE HEREfor (j in2:nrow(cv_subset)) { cv_subset$new_cases[j] = cv_subset$cases[j] - cv_subset$cases[j -1] cv_subset$new_deaths[j] = cv_subset$deaths[j] - cv_subset$deaths[j -1] }# include in main dataset cv_states$new_cases[cv_states$state==state_list[i]] = cv_subset$new_cases cv_states$new_deaths[cv_states$state==state_list[i]] = cv_subset$new_deaths}# Focus on recent datescv_states <- cv_states %>% dplyr::filter(date >="2021-06-01")### FINISH THE CODE HERE# Inspect outliers in new_cases using plotlyp1<-ggplot(cv_states, aes(x = date, y = new_cases, color = state)) +ylab("New Cases") +geom_point(size = .5, alpha =0.5)ggplotly(p1)
p1<-NULL# to clear from workspacep2<-ggplot(cv_states, aes(x = date, y = new_deaths, color = state)) +ylab("New Deaths") +geom_point(size = .5, alpha =0.5)ggplotly(p2)
p2<-NULL# to clear from workspace# set negative new case or death counts to 0cv_states$new_cases[cv_states$new_cases<0] =0cv_states$new_deaths[cv_states$new_deaths<0] =0# Recalculate `cases` and `deaths` as cumulative sum of updated `new_cases` and `new_deaths`for (i in1:length(state_list)) { cv_subset =subset(cv_states, state == state_list[i])# add starting level for new cases and deaths cv_subset$cases = cv_subset$cases[1] cv_subset$deaths = cv_subset$deaths[1]### FINISH CODE HEREfor (j in2:nrow(cv_subset)) { cv_subset$cases[j] = cv_subset$new_cases[j] + cv_subset$cases[j -1] cv_subset$deaths[j] = cv_subset$new_deaths[j] + cv_subset$deaths[j -1] }# include in main dataset cv_states$cases[cv_states$state==state_list[i]] = cv_subset$cases cv_states$deaths[cv_states$state==state_list[i]] = cv_subset$deaths}# Smooth new countscv_states$new_cases = zoo::rollmean(cv_states$new_cases, k=7, fill=NA,align='right') %>%round(digits =0)cv_states$new_deaths = zoo::rollmean(cv_states$new_deaths, k=7, fill=NA,align='right') %>%round(digits =0)# Inspect data again interactivelyp2<-ggplot(cv_states, aes(x = date, y = new_deaths, color = state)) +geom_line() +geom_point(size = .5, alpha =0.5)ggplotly(p2)
5.
### FINISH CODE HERE# add population normalized (by 100,000) counts for each variablecv_states$per100k =as.numeric(format(round(cv_states$cases/(cv_states$population/100000),1),nsmall=1))cv_states$newper100k =as.numeric(format(round(cv_states$new_cases/(cv_states$population/100000),1),nsmall=1))
# add a naive_CFR variable = deaths / casescv_states = cv_states %>%mutate(naive_CFR =round((deaths*100/cases),2))# create a `cv_states_today` variablecv_states_today =subset(cv_states, date==max(cv_states$date))
6.
# pop_density vs. casescv_states_today %>%plot_ly(x =~pop_density, y =~cases, type ='scatter', mode ='markers', color =~state,size =~population, sizes =c(5, 70), marker =list(sizemode='diameter', opacity=0.5))
Warning: Ignoring 1 observations
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
# filter out "District of Columbia"cv_states_today_filter <- cv_states_today %>%filter(state!="District of Columbia")# pop_density vs. cases after filteringcv_states_today_filter %>%plot_ly(x =~pop_density, y =~cases, type ='scatter', mode ='markers', color =~state,size =~population, sizes =c(5, 70), marker =list(sizemode='diameter', opacity=0.5))
Warning: Ignoring 1 observations
Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
# pop_density vs. deathsper100kcv_states_today_filter %>%plot_ly(x =~pop_density, y =~deathsper100k,type ='scatter', mode ='markers', color =~state,size =~population, sizes =c(5, 70), marker =list(sizemode='diameter', opacity=0.5))
Warning: Ignoring 1 observations
Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
# Adding hoverinfocv_states_today_filter %>%plot_ly(x =~pop_density, y =~deathsper100k,type ='scatter', mode ='markers', color =~state,size =~population, sizes =c(5, 70), marker =list(sizemode='diameter', opacity=0.5),hoverinfo ='text',text =~paste( paste(state, ":", sep=""), paste(" Cases per 100k: ", per100k, sep="") , paste(" Deaths per 100k: ", deathsper100k, sep=""), sep ="<br>")) %>%layout(title ="Population-normalized COVID-19 deaths (per 100k) vs. population density for US states",yaxis =list(title ="Deaths per 100k"), xaxis =list(title ="Population Density"),hovermode ="compare")
Warning: Ignoring 1 observations
Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning: n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
7.
p <-ggplot(cv_states_today_filter, aes(x=pop_density, y=deathsper100k,size=population)) +geom_point() +geom_smooth()ggplotly(p)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: The following aesthetics were dropped during statistical transformation: size
ℹ This can happen when ggplot fails to infer the correct grouping structure in
the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
variable into a factor?
plot_ly(cv_states, x =~date, y =~naive_CFR, color =~state, type ="scatter",mode ="lines")
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
cv_states %>%filter(state=="Florida") %>%plot_ly(x =~date, y =~new_cases,type ="scatter", mode ="lines") %>%add_trace(x =~date, y =~new_deaths, type ="scatter", mode ="lines")
# Create a second heatmap after filtering to only include dates every other weekfilter_dates <-seq(as.Date("2021-06-15"), as.Date("2021-11-01"), by=14)cv_states_mat <- cv_states %>%select(state, date, newper100k) %>%filter(date %in% filter_dates)cv_states_mat2 <-as.data.frame(pivot_wider(cv_states_mat, names_from = state,values_from = newper100k))rownames(cv_states_mat2) <- cv_states_mat2$datecv_states_mat2$date <-NULLcv_states_mat2 <-as.matrix(cv_states_mat2)# Create a heatmap using plot_ly()plot_ly(x=colnames(cv_states_mat2), y=rownames(cv_states_mat2),z=~cv_states_mat2,type="heatmap",showscale=T)
10.
### For specified datepick.date ="2021-10-15"# Extract the data for each state by its abbreviationcv_per100 <- cv_states %>%filter(date==pick.date) %>%select(state, abb, newper100k, cases, deaths) # select datacv_per100$state_name <- cv_per100$statecv_per100$state <- cv_per100$abbcv_per100$abb <-NULL# Create hover textcv_per100$hover <-with(cv_per100, paste(state_name, '<br>', "Cases per 100k: ", newper100k, '<br>', "Cases: ", cases,'<br>', "Deaths: ", deaths))# Set up mapping detailsset_map_details <-list(scope ='usa',projection =list(type ='albers usa'),showlakes =TRUE,lakecolor =toRGB('white'))# Make sure both maps are on the same color scaleshadeLimit <-125# Create the mapfig <-plot_geo(cv_per100, locationmode ='USA-states') %>%add_trace(z =~newper100k, text =~hover, locations =~state,color =~newper100k, colors ='Purples' )fig <- fig %>%colorbar(title =paste0("Cases per 100k: ", pick.date),limits =c(0,shadeLimit))fig <- fig %>%layout(title =paste('Cases per 100k by State as of ', pick.date, '<br>(Hover for value)'),geo = set_map_details)fig_pick.date <- fig################ Map for today's date# Extract the data for each state by its abbreviationcv_per100 <- cv_states_today %>%select(state, abb, newper100k, cases, deaths) # select datacv_per100$state_name <- cv_per100$statecv_per100$state <- cv_per100$abbcv_per100$abb <-NULL# Create hover textcv_per100$hover <-with(cv_per100, paste(state_name, '<br>', "Cases per 100k: ", newper100k, '<br>', "Cases: ", cases,'<br>', "Deaths: ", deaths))# Set up mapping detailsset_map_details <-list(scope ='usa',projection =list(type ='albers usa'),showlakes =TRUE,lakecolor =toRGB('white'))# Create the mapfig <-plot_geo(cv_per100, locationmode ='USA-states') %>%add_trace(z =~newper100k, text =~hover, locations =~state,color =~newper100k, colors ='Purples' )fig <- fig %>%colorbar(title =paste0("Cases per 100k: ", Sys.Date()), limits =c(0,shadeLimit))fig <- fig %>%layout(title =paste('Cases per 100k by State as of', Sys.Date(), '<br>(Hover for value)'),geo = set_map_details)fig_Today <- fig### Plot together subplot(fig_pick.date, fig_Today, nrows =2, margin = .05)